# "Efficiency and water use: Dynamic effects of irrigation technology adoption"
# by Micah Cameron-Harp and Nathan Hendricks
# 
# Code written by Micah Cameron-Harp
# May 16th, 2024
# 
# This r script estimates the ATT and dynamic treatment effects for both transitions
#   using the Callaway and Sant'Anna estimator. Subsequent 
# 	do files combine these estimates with the TWFE estimates produced
#   in STATA to generate figures 3, 4, and 5.
# This script also generates the cohort specific treatment effects 
#   in Tables C.2 and C.3, and the dynamic treatment effects in
#   Tables C.6 and C.7.

pacman::p_load(
  did,
  openxlsx,
  dplyr,
  haven,
  tidyverse,
  estimatr)

# Parse arguments submitted by STATA
args = commandArgs(trailingOnly = "TRUE")
arg1 <- args[1]

#Define directories and set working directory
# NOTE - The root directory will be passed here from the main_text_results.do file 
  dr_root <- paste0(arg1)
  dr_data <- paste0(dr_root, "/data")
  dr_output = paste0(dr_root, "/outputs")
  dr_output_main = paste0(dr_root, "/outputs/main_text")
  dr_output_app = paste0(dr_root, "/outputs/appendices")
  dr_output_log = paste0(dr_root, "/outputs/logs")
  dr_temp = paste0(dr_root, "/data/intermediate")
  setwd(dr_temp)

#----------------------------------------------# 
#------ Flood to Center Pivot or LEPA ---------#
#----------------------------------------------# 
wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped.csv"))

#Filter the data to only include years before 2016 
wrg_flood_cplepa <- wrg_flood_cplepa %>% dplyr::filter(wua_year <= 2015)

#Create a new dataframe to store the results in 
  df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                  dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                        "estimate", "se_estimate", 
                                                      	"c_crit_val", "panel_mean_dep_var"))))
  #Make one version for the ATT's
  att_df <- df_template
  #Make another for the cohort effects
  grp_df <- df_template
  #Next version for the event study coefficients
  dyn_df <- df_template
  
  #List the dependent variables
  effect_order <- c("af_used", "acres_irr", "depth_applied")
  
  #Loop through dependent variables
  for (i in effect_order) {
    dep_var <- i
      #Make a df of just the dep var so we can get its mean value
      df_dep_var <- wrg_flood_cplepa %>% dplyr::select(all_of(dep_var))
      mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
    #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
    floodcplepa_attgt_nyt <- att_gt(yname = dep_var,
                                      tname = "wua_year",
                                      idname = "WR_GROUP",
                                      gname = "cohort_flood_cplepa",
                                      data = wrg_flood_cplepa,
                                      panel = TRUE,
                                      allow_unbalanced_panel = TRUE,
                                      xformla=~hyd_cond+predev_sat+awc+sandtotal+silttotal+init_af_used,
                                      est_method = "dr",
                                      control_group = c("notyettreated"),
                                      anticipation=0,
                                      clustervars = "WR_GROUP", 
                                      biters=1000
    )
    #Group specific effects to get overall ATT
    agg.gs <- aggte(floodcplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
      #Store ATT  
      att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
      #Store cohort effects
      agg.gs <- aggte(floodcplepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
      group_order <- c(agg.gs[["egt"]])
      g_est_lst <- c(agg.gs[["att.egt"]])
      g_se_lst <- c(agg.gs[["se.egt"]])
      g_c <- agg.gs$crit.val.egt
      for (i in 1:length(group_order)) {
        grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
      }

    #Dynamic effects
    agg.es <- aggte(floodcplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = 0, max_e = Inf)
      #Store the att  
      att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"], agg.es["crit.val.egt"], mean_dep_var) 
      #Store the event study coefficients
      agg.es <- aggte(floodcplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
      effect_order <- c(agg.es[["egt"]])
      est_lst <- c(agg.es[["att.egt"]])
      se_lst <- c(agg.es[["se.egt"]])
      e_c <- agg.es$crit.val.egt
      for (i in 1:length(effect_order)) {
        dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
      }
  }
  #Create the 95% ci variables
  att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                               ci_95_ub = estimate + c_crit_val*se_estimate)
  dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  
  #Save all three datasets in one excel workbook
  dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
  write.xlsx(dataset_names, file = file.path(dr_temp, 'cs_floodtocporlepa_91_15_nyt.xlsx'))
  
  #Remove objects and free up memory
  rm(wrg_flood_cplepa, att_df, dyn_df, grp_df, agg.es, agg.gs)
  gc()
  
#----------------------------------------------# 
#---------- Center Pivot to LEPA --------------#
#----------------------------------------------# 
  wrg_cp_lepa <- read.csv(file = file.path(dr_temp, "cp_lepa_prepped.csv"))
  
  #Create a new dataframe to store the results in 
  df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                   dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                         "estimate", "se_estimate", 
                                                         "c_crit_val", "panel_mean_dep_var"))))
  #Make one version for the ATT's
  att_df <- df_template
  #Make another for the cohort effects
  grp_df <- df_template
  #Next version for the event study coefficients
  dyn_df <- df_template
  
  #List the dependent variables
  effect_order <- c("af_used", "acres_irr", "depth_applied")
  
  for (i in effect_order) {
    dep_var <- i
    #Make a df of just the dep var so we can get its mean value
    df_dep_var <- wrg_cp_lepa %>% dplyr::select(all_of(dep_var))
    mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
    #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
    cplepa_attgt <- att_gt(yname = dep_var,
                                          tname = "wua_year",
                                          idname = "WR_GROUP",
                                          gname = "cohort_cp_lepa",
                                          data = wrg_cp_lepa,
                                          panel = TRUE,
                                          allow_unbalanced_panel = TRUE,
                                          xformla=~hyd_cond+predev_sat+sandtotal+silttotal+init_af_used,
                                          est_method = "dr",
                                          control_group = c("notyettreated"),
                                          anticipation=0,
                                          clustervars = "WR_GROUP", 
                                          biters=1000
    )
    #Group specific effects to get overall ATT
    agg.gs <- aggte(cplepa_attgt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
      #Store ATT  
      att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
      #Store cohort effects
      agg.gs <- aggte(cplepa_attgt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
      group_order <- c(agg.gs[["egt"]])
      g_est_lst <- c(agg.gs[["att.egt"]])
      g_se_lst <- c(agg.gs[["se.egt"]])
      g_c <- agg.gs$crit.val.egt
      for (i in 1:length(group_order)) {
        grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
      }
    
    #Dynamic effects
    agg.es <- aggte(cplepa_attgt, type = "dynamic", na.rm = TRUE, min_e = 0, max_e = Inf)
      #Store the att  
      att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"], agg.es["crit.val.egt"], mean_dep_var) 
      #Store the event study coefficients
      agg.es <- aggte(cplepa_attgt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
      effect_order <- c(agg.es[["egt"]])
      est_lst <- c(agg.es[["att.egt"]])
      se_lst <- c(agg.es[["se.egt"]])
      e_c <- agg.es$crit.val.egt
      for (i in 1:length(effect_order)) {
        dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
      }
  }
  #Create the 95% ci variables
  att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                               ci_95_ub = estimate + c_crit_val*se_estimate)
  dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                               ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
  
  #Save as excel workbook
  dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
  write.xlsx(dataset_names, file = file.path(dr_temp, 'cs_cptolepa.xlsx'))
  
 